Loading Necessary Packages

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Load spatial data
require(sf) || install.packages("sf", dependencies = TRUE)
## Loading required package: sf
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
## [1] TRUE
library(sf)

require(mapview) || install.packages("mapview", dependencies = TRUE)
## Loading required package: mapview
## [1] TRUE
library(mapview)
mapviewOptions(fgb = FALSE)

require(leafsync) || install.packages("leafsync", dependencies = TRUE)
## Loading required package: leafsync
## [1] TRUE
library(leafsync)

require(maps) || install.packages("maps", dependencies = TRUE)
## Loading required package: maps
## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map
## [1] TRUE
library(maps)

Combine Person/Household Data

PersonData <- read_rds('../data/PersonData_111A.Rds')
HHData <- read_rds('../data/HHData_111A.Rds')
hh_bgDensity <- read_rds('../data/hh_bgDensity.Rds')

personHHData <- left_join(PersonData, HHData) %>%
  left_join(hh_bgDensity)
## Joining with `by = join_by(hhid)`
## Joining with `by = join_by(hhid)`
county_shp <- st_read("../data/counties/counties.shp")
## Reading layer `counties' from data source 
##   `/Users/Rebecca/PSTAT 197/vignette-householdclassification/data/counties/counties.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 58 features and 17 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -124.3926 ymin: 32.53578 xmax: -114.1312 ymax: 42.00952
## Geodetic CRS:  WGS 84

Quick Plot

plot(county_shp["NAME"])  

The unique colors correspond to the unique names of each county.

List Variable Names

names(personHHData)
##  [1] "hhid"               "pnum"               "Male"              
##  [4] "Age"                "persHisp"           "persWhite"         
##  [7] "persAfricanAm"      "persNativeAm"       "persAsian"         
## [10] "persPacIsl"         "persOthr"           "persDKrace"        
## [13] "persRFrace"         "bornUSA"            "DriverLic"         
## [16] "TransitPass"        "Employed"           "WorkFixedLoc"      
## [19] "WorkHome"           "WorkNonfixed"       "WorkDaysWk"        
## [22] "TypicalHoursWk"     "FlexSched"          "FlexPrograms"      
## [25] "Disability"         "DisLicensePlt"      "TransitTripsWk"    
## [28] "WalkTripsWk"        "BikeTripsWk"        "Student"           
## [31] "WorkMode"           "SchoolMode"         "EducationCompl"    
## [34] "workday"            "Walk_Dist"          "Bike_Dist"         
## [37] "DriveAlone_Dist"    "Driveothers_Dist"   "Passenger_Dist"    
## [40] "Plane_Dist"         "Allothers_Dist"     "Walk_trips"        
## [43] "Bike_trips"         "DriveAlone_trips"   "Driveothers_trips" 
## [46] "Passenger_trips"    "Plane_trips"        "Allothers_trips"   
## [49] "Sum_Trips"          "Sum_PMT"            "CTFIP"             
## [52] "County"             "MPO"                "City"              
## [55] "DOW"                "HH_size"            "HH_nTrips"         
## [58] "HH_nEmployees"      "HH_nStudents"       "HH_nLicenses"      
## [61] "HH_nCars"           "HH_nBikes"          "HH_income"         
## [64] "HH_anyTransitRider" "HH_homeType"        "HH_homeowner"      
## [67] "HH_isHispanic"      "HH_intEnglish"      "bg_density"        
## [70] "bg_group"

Interactive Map

Aggregating CHTS traits to county level and joining to shapefile

Taking some of the CHTS person and household characteristics and aggregating them to the county level so we can map the traits of our CHTS respondents county-by-county.

Get the total count of how many people in our survey come from the various counties of California:

personHHData %>% 
  group_by(CTFIP, County) %>% # added County to this grouping so we can see the county names
  summarise(count=n())
## `summarise()` has grouped output by 'CTFIP'. You can override using the
## `.groups` argument.
## # A tibble: 58 × 3
## # Groups:   CTFIP [58]
##    CTFIP County       count
##    <dbl> <chr>        <int>
##  1  6001 Alameda       3531
##  2  6003 Alpine          44
##  3  6005 Amador         339
##  4  6007 Butte          774
##  5  6009 Calaveras      338
##  6  6011 Colusa         216
##  7  6013 Contra Costa  2951
##  8  6015 Del Norte      412
##  9  6017 El Dorado      863
## 10  6019 Fresno        2669
## # ℹ 48 more rows

We get the county-by-county means of all the variables of interest. Taking the means instead of just getting sums accounts for the fact that there are not equal amounts of people surveyed from each county. We also create a new column count to this dataset so we can carry over the counts of how many people were surveyed from each county.

prhh_aggreg <- personHHData %>% 
  group_by(County, CTFIP) %>%
  mutate(count = n()) %>% # the new column called 'count'
  summarise_at(vars(-hhid, -pnum, -bg_group), mean)
## Warning: There were 348 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `WorkMode = (function (x, ...) ...`.
## ℹ In group 1: `County = "Alameda"`, `CTFIP = 6001`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 347 remaining warnings.

Join datasets on CTFIP (county ID) variable

county_prhh_shp <- county_shp %>% left_join(prhh_aggreg)
## Joining with `by = join_by(CTFIP)`

Newly-created county_prhh_shp is also an sf object so it has spatial attributes and can be mapped.

Map of the percentage of CHTS respondents born in the USA per county.

mapview(county_prhh_shp, # the dataset to use 
        zcol = "bornUSA", # tells it which column to map
        legend = TRUE, # if FALSE, it won't show the legend 
        label = as.character(county_prhh_shp$NAME), # tells it the column whose value you want to appear when you hover over a shape with your mouse
        popup = leafpop::popupTable(x = county_prhh_shp, zcol = c("bornUSA", "count"))  # determines what is included in the popup window when you click on a shape
        )

Notice that the percentage of respondents who were born in the USA increases as you look at more northern counties. However, also notice that more urban counties have lower percentages, regardless of their position in the state.

Lattice of Interactive Maps

Group the data we have into 4 parts based on their bg_group

4 maps for each residential location type

county_bg_aggreg <- personHHData %>% 
  group_by(County, CTFIP, bg_group) %>%  # group by county, CTFIP, and also bg_group
  mutate(count = n()) %>% 
  summarise_at(vars(-hhid, -pnum), mean)
## Warning: There were 930 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `WorkMode = (function (x, ...) ...`.
## ℹ In group 1: `County = "Alameda"`, `CTFIP = 6001`, `bg_group = Urban`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 929 remaining warnings.
county_bg_shp <- county_shp %>% 
  merge(data.frame(bg_group = c("Urban", "Suburban", "Exurban", "Rural"))) %>% 
  left_join(county_bg_aggreg)
## Joining with `by = join_by(CTFIP, bg_group)`

Sum Trips by Residential Area

urban_TripMap <-  mapview(filter(county_bg_shp, bg_group == "Urban"),
                          zcol = "Sum_Trips", legend = TRUE, popup = NULL,
                          layer.name = "Urban Trips")

suburb_TripMap <- mapview(filter(county_bg_shp, bg_group == "Suburban"),
                          zcol = "Sum_Trips", legend = TRUE, popup = NULL,
                          layer.name = "Suburban Trips")

exurb_TripMap <- mapview(filter(county_bg_shp, bg_group == "Exurban"),
                         zcol = "Sum_Trips", legend = TRUE, popup = NULL,
                         layer.name = "Exurban Trips")

rural_TripMap <- mapview(filter(county_bg_shp, bg_group == "Rural"),
                         zcol = "Sum_Trips", legend = TRUE, popup = NULL,
                         layer.name = "Rural Trips")
latticeview(urban_TripMap, suburb_TripMap, exurb_TripMap, rural_TripMap, sync = "all")

Sum PMT/Distance by Residential Area

urban_PMTMap <- mapview(filter(county_bg_shp, bg_group == "Urban"),
                        zcol = "Sum_PMT", legend = TRUE, popup = NULL,
                        layer.name = "Urban PMT")

suburb_PMTMap <- mapview(filter(county_bg_shp, bg_group == "Suburban"),
                         zcol = "Sum_PMT", legend = TRUE, popup = NULL,
                         layer.name = "Suburban PMT")

exurb_PMTMap <- mapview(filter(county_bg_shp, bg_group == "Exurban"),
                        zcol = "Sum_PMT", legend = TRUE, popup = NULL,
                        layer.name = "Exurban PMT")

rural_PMTMap <- mapview(filter(county_bg_shp, bg_group == "Rural"),
                        zcol = "Sum_PMT", legend = TRUE, popup = NULL,
                        layer.name = "Rural PMT")
latticeview(urban_PMTMap, suburb_PMTMap, exurb_PMTMap, rural_PMTMap, sync = "all")

Static Map

county <- ggplot2::map_data("county", region = "california") # get the CA county data

county_bg <- merge(county, data.frame(bg_group = c("Urban", "Suburban", "Exurban", "Rural")))

county_bg_all <- county_bg_aggreg %>% 
  mutate(subregion = tolower(County)) %>% 
  full_join(county_bg, by = c("subregion", "bg_group"))
ggplot(county_bg_all) +
  geom_polygon(aes(x = long, y = lat, group = subregion, fill = Sum_PMT), colour = "white") +
  scale_fill_distiller(palette = "YlGnBu", direction = 1) +
  facet_wrap(vars(bg_group), nrow = 2) +  # multi-panel plots using facet_wrap(), plot in 2 rows
  ggtitle("Total PMT in California at County-level") + 
  theme_void() +
  theme(legend.position="bottom")